con <- DBI::dbConnect(RPostgres::Postgres(),dbname = "wefarmtestdb",user = rstudioapi::askForPassword("Database user"),password = rstudioapi::askForPassword("Database password"),port = "5432")
dbListTables(con)
## [1] "target_group" "target_group_user" "impression"
## [4] "member" "member_post"
SELECT
Content,
impression.keyword,
COUNT(*) AS click_through_value
FROM
target_group
INNER JOIN impression
ON target_group.keyword = impression.keyword
GROUP BY content,impression.keyword
ORDER BY impression.keyword;
print(click_through_value)
## content
## 1 Wefarm online is here! Join farming groups, connect with other farmers & share tips to help each other succeed. Join now we.fm/sms/i1
## 2 Be known as a model farmer in your area of farming by answering your fellow farmers' questions on Wefarm online. Join now we.fm/sms/i3
## 3 Farming is hard work! We need to help each other succeed. Wefarm online is a farmer-only community connecting farmers from all over Kenya. Join now we.fm/sms/i5
## 4 What did you do on your farm today? share your daily activities on Wefarm online to inspire others & to keep track of your progress. Join now we.fm/sms/i6
## keyword click_through_value
## 1 /sms/i1 28
## 2 /sms/i3 22
## 3 /sms/i5 33
## 4 /sms/i6 26
SELECT
target_group_id,
COUNT (*) AS join_value
FROM target_group_user
INNER JOIN member
ON user_phone_number = member_phone_number
WHERE target_group_id
IN(1,3,5,6)
GROUP BY target_group_id;
print(join_value)
## target_group_id join_value
## 1 1 9
## 2 3 1
## 3 5 3
## 4 6 1
SELECT
target_group_id,
COUNT(*) AS contribution_value
FROM member_post
INNER JOIN target_group_user
ON member_post.member_phone_number = target_group_user.user_phone_number
WHERE target_group_id
IN(1,3,5,6)
GROUP BY target_group_id;
print(contribution_value)
## target_group_id contribution_value
## 1 5 1
SELECT
target_group_id,
COUNT (*) AS target_number
FROM target_group_user
WHERE target_group_id
IN(1,3,5,6)
GROUP BY target_group_id
ORDER BY target_group_id;
click_through_df <- rbind("Target number" =target_group_user$target_number,"Click value"=click_through_value$click_through_value,"Join value" =join_value$join_value)
colnames(click_through_df) <- c("Variation 1", "Variation 3" , "Variation 5", "Variation 6")
click_through_rate <- mapply('/', click_through_df[2,],click_through_df[1,])*100
click_through_rate
## Variation 1 Variation 3 Variation 5 Variation 6
## 1.866667 1.465690 2.198534 1.733333
join_rate <- mapply('/', click_through_df[3,],click_through_df[2,])*100
join_rate
## Variation 1 Variation 3 Variation 5 Variation 6
## 32.142857 4.545455 9.090909 3.846154
click_join_df <- as.data.frame(t(rbind (click_through_rate, join_rate)))
click_join_df <- rownames_to_column(click_join_df, var="Variation") %>% head
click_join_df_long <- gather(click_join_df, key="Measure", value="Value", c("click_through_rate", "join_rate"))
plot1 <- click_join_df_long %>%
arrange(Value) %>%
mutate(Variation=factor(Variation, levels=rev(unique(Variation)), ordered=TRUE)) %>%
ggplot( aes(fill=Measure, x=Variation, y=Value))+
geom_bar(position = "dodge", stat = "identity", width=0.4) +
ggtitle("Click Through Rate vs Join Rate")+
xlab("Test Group")+
ylab("Rate")
ggplotly(plot1)
From the plot, we see that variation 5 (2.1985) has a better click through rate than variation 1 (1.86667). We also see that variation 1 (32.143) has the best join rate from the sample.
Next we are going to test the null hypothesis i.e the generic message (variation 1) would have had a lower click through rate and join rate than the user-based value propositions (other variations in the test), then further test for the statistical significance of the result.
chisq.test(click_join_df$join_rate,click_join_df$click_through_rate)
## Warning in chisq.test(click_join_df$join_rate,
## click_join_df$click_through_rate): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: click_join_df$join_rate and click_join_df$click_through_rate
## X-squared = 12, df = 9, p-value = 0.2133
A chi-squared test statistic at 95% confidence level returns p-values for both click rate and join rate that are greater than 0.05 (i.e 0.2133 > 0.05). This indicates that there isnโt enough evidence to to reject the null hypothesis thus we fail to reject the null hypothesis and conclude that the test result is not statistically significant at this point.